Load Packages

# install.packages(c("dplyr","ggplot","plotly","htmlwidgets"))
library(dplyr)
library(ggplot2)
library(plotly)
library(htmlwidgets)

Loading our Data

data <- read.csv(here::here("Raw_Data","film.csv"),sep = ';')

Clean & Explore our Data

# take a look at first couple rows
head(data)
##   Year Length                   Title Subject               Actor
## 1  INT    INT                  STRING     CAT                 CAT
## 2 1990    111 Tie Me Up! Tie Me Down!  Comedy   Banderas, Antonio
## 3 1991    113              High Heels  Comedy        Bosé, Miguel
## 4 1983    104          Dead Zone, The  Horror Walken, Christopher
## 5 1979    122                    Cuba  Action       Connery, Sean
## 6 1978     94          Days of Heaven   Drama       Gere, Richard
##           Actress          Director Popularity Awards          X.Image
## 1             CAT               CAT        INT   BOOL           STRING
## 2 Abril, Victoria  Almodóvar, Pedro         68     No NicholasCage.png
## 3 Abril, Victoria  Almodóvar, Pedro         68     No NicholasCage.png
## 4   Adams, Brooke Cronenberg, David         79     No NicholasCage.png
## 5   Adams, Brooke   Lester, Richard          6     No  seanConnery.png
## 6   Adams, Brooke  Malick, Terrence         14     No NicholasCage.png
# remove 1st row and last column
clean_data <- data %>% filter(!(Year == "INT")) %>%
     select(!("X.Image"))
# check if it worked
dim(data)
## [1] 1660   10
dim(clean_data)
## [1] 1659    9
# check class types for each column
lapply(clean_data,class)
## $Year
## [1] "character"
## 
## $Length
## [1] "character"
## 
## $Title
## [1] "character"
## 
## $Subject
## [1] "character"
## 
## $Actor
## [1] "character"
## 
## $Actress
## [1] "character"
## 
## $Director
## [1] "character"
## 
## $Popularity
## [1] "character"
## 
## $Awards
## [1] "character"
# change class types
clean_data[,c(1:2,8)] <- lapply(clean_data[,c(1:2,8)], as.integer)

# check class types to see if it worked
lapply(clean_data,class)
## $Year
## [1] "integer"
## 
## $Length
## [1] "integer"
## 
## $Title
## [1] "character"
## 
## $Subject
## [1] "character"
## 
## $Actor
## [1] "character"
## 
## $Actress
## [1] "character"
## 
## $Director
## [1] "character"
## 
## $Popularity
## [1] "integer"
## 
## $Awards
## [1] "character"
# replace empty cells with NA
clean_data[clean_data == ""] <- NA

# check if it worked
table(is.na(clean_data))
## 
## FALSE  TRUE 
## 14217   714

F1. Does the proportion of movies made in each subject, change throughout time?

F1 <- clean_data %>% filter(!(is.na(Year) | is.na(Subject))) %>%
                     group_by(Year,Subject) %>%
                     summarise(cat_n = n()) %>%
                     mutate(prop_cat = (cat_n / sum(cat_n))*100)

F2. Compare the popularity score for movies that did and did not receive awards.

F2 <- clean_data %>% filter(!(is.na(Awards) | is.na(Popularity)))

Visualizing our Data

Static Visualizations: ggplot2

# make our figure 1
Fig1 <- ggplot(F1, aes(fill=Subject, y=prop_cat, x=Year)) + 
    geom_bar(position="fill", stat="identity") + 
     ylab("% of Movies") + 
     labs(fill = "Genre") +
     ggtitle("Proportion of Movie Genres Over Time")
Fig1

# make our figure 2
Fig2 <- ggplot(F2, aes(x=Awards, y=Popularity, fill = Awards)) + 
    geom_boxplot() + 
    xlab("Whether or Not Movie Recieved Awards") + 
    ylab("Popularity Score") +
    ggtitle("Critical Praise vs General Popularity for Movies")
Fig2

Interactive Visualizations: plotly

# making Figure 1 interactive
Fig1i <- ggplot(F1, aes(fill=Subject, y=prop_cat, x=Year,
               text = paste(
                           "Year: ", Year, "\n",
                           "% of Movies: ", round(prop_cat, digits = 0), "\n",
                           "Genre: ", Subject, "\n"
                   ))) + 
    geom_bar(position="fill", stat="identity") + 
     ylab("Proportion of Movies") + 
     labs(fill = "Genre") +
     ggtitle("Proportion of Movie Genres Over Time")
    
Fig1i <- ggplotly(Fig1i, tooltip = "text")

Fig1i
Fig2i <- ggplotly(Fig2)

Fig2i

Saving Figures

# save as static image files
ggsave(here::here("Figures","F1.png"))
ggsave(here::here("Figures","F2.png"))

# save as interactive html files
htmltools::save_html(Fig1i, file= here::here("Figures","Fig1i.html"))
htmltools::save_html(Fig2i, file= here::here("Figures","Fig2i.html"))

# save as r data objects to embed in other R products (xaringan slides)
save(Fig1i, file = here::here("Figures","Fig1i.rda"))
save(Fig2i, file = here::here("Figures","Fig2i.rda"))